########################################################################################################################################
## ECONOMIC SHOCKS & ISSUE IMPORTANCE                                                                                                 ##
## Author:  Sarah Gomm, based on Hanretty et al. 2020 https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/0EIQXL ##
## Date:    February 2023                                                                                                             ##
## Data:    SEP Wave 6                                                                                                                ##
########################################################################################################################################


rm(list=ls())
# Check the username and set the working directory   ---------------------------
# Get the username 
username <- Sys.getenv("USERNAME")

if (username == "FILL IN YOUR USERNAME") {
  setwd("FILL IN YOUR WORKING DIRECTORY")
} else {
  stop("Unknown user. Please check your username.")
}

# Load Stan
library(rstan)


###############
# Load cleaned data objects

load("Surveydata_fullsample.Rdata")
N_items <- 16
N_respondents <- nrow(svyclean)

####################
### Set up data for conjoint model

# check that all menu items have same levels
apply(sapply(conjoint_menus,levels),1,unique)

# create lookup table for conjoint menu items
conjoint_menu_levels <- data.frame(
    item = rep(1:N_items,each=4),
    level = rep(1:4,times=N_items),
    levels = levels(conjoint_menus$Manifesto_2_2_A_t1)
)

# convert conjoint menu data into array
conjoint_menus_numeric_wide <- sapply(conjoint_menus,as.numeric)

conjoint_menus_itemIDs <- apply(conjoint_menus_numeric_wide,2,function(x) conjoint_menu_levels$item[x])
conjoint_menus_levelIDs <- apply(conjoint_menus_numeric_wide,2,function(x) conjoint_menu_levels$level[x])

conjoint_menus_itemIDs_array <- array(conjoint_menus_itemIDs,
                                      dim=c(N_respondents,2,3,4),
                                      dimnames=list(Respondent=1:N_respondents,Alternative=c("A","B"),MenuPosition=1:3,Round=1:4))

conjoint_menus_levelIDs_array <- array(conjoint_menus_levelIDs,
                                       dim=c(N_respondents,2,3,4),
                                       dimnames=list(Respondent=1:N_respondents,Alternative=c("A","B"),MenuPosition=1:3,Round=1:4))

# lookup respondents positions for each item
ordinal_responses_numeric <- sapply(ordinal_responses,as.numeric)
conjoint_menus_positionIDs_array <- array(NA,
                                          dim=c(N_respondents,2,3,4),
                                          dimnames=list(Respondent=1:N_respondents,Alternative=c("A","B"),MenuPosition=1:3,Round=1:4))
for (i in 1:N_respondents) for (j in 1:3) {
    conjoint_menus_positionIDs_array[i,,j,] <- 
    ordinal_responses_numeric[i,conjoint_menus_itemIDs_array[i,1,j,1]]
    if (is.na(conjoint_menus_positionIDs_array[i,1,j,1] )) {
        print(paste("Linking error at respondent",i,"menu item",j))
    }    
}

conjoint_responses_wide <- sapply(conjoint_responses,as.numeric) - 1

# number of differing positions for each conjoint comparison, to allow estimation of different response thresholds

differing_positions_count <- apply(conjoint_menus_levelIDs_array,c(1,4),function(x) sum(x[1,] != x[2,]))

# set up covariates for respondent-variation in the weights on different issues

conjoint_scaling_data <- list(
    Y = conjoint_responses_wide, #1=Candidate A, 2= Candidate B
    D = differing_positions_count, #1,2, or 3 differing positions
    itemID = conjoint_menus_itemIDs_array, #which item in which round on which position (1-16)
    candidatepositionID = conjoint_menus_levelIDs_array, #position of candidate A & B in round and menu item (1-4)
    respondentpositionID = conjoint_menus_positionIDs_array, #position of respondent on items in conjoint (1-4)
    ordinal_responses = replace(ordinal_responses_numeric,is.na(ordinal_responses_numeric),0), #0-4: respondent positions
    N_menulength = 3,
    N_itemalts = 4,
    N_items = N_items,
    N_rounds = ncol(conjoint_responses_wide),
    N_respondents = nrow(conjoint_responses_wide)
)

###############
# Stan code for conjoint analysis

conjoint_code <- '

data {

    int<lower = 1> N_respondents; // Number of respondents 
    int<lower = 1> N_rounds; // Number of rounds per respondent 
    int<lower = 1> N_itemalts; // Number of policy alternatives 
    int<lower = 1> N_menulength; // Number of issues per conjoint 
    int<lower = 1> N_items; // Number of issues 
    
    int<lower = 0,upper=1> Y[N_respondents,N_rounds]; // Matrix of conjoint response data
    int<lower = 1,upper=3> D[N_respondents,N_rounds]; // How many differing positions were there for each conjoint comparison?
    int<lower = 1,upper=N_items> itemID[N_respondents,2,N_menulength,N_rounds];
    int<lower = 1,upper=N_itemalts> candidatepositionID[N_respondents,2,N_menulength,N_rounds];
    int<lower = 1,upper=N_itemalts> respondentpositionID[N_respondents,2,N_menulength,N_rounds];

    int<lower = 0,upper=4> ordinal_responses[N_respondents,N_items];

   
}

parameters {

    vector<lower=0>[N_itemalts-1] psi_std[N_items]; // positions of alternatives on each issue
    real gamma[3]; // cutpoints for choosing A vs B if 1/2/3 positions differ

    simplex[4] pi[N_items]; // estimated population proportions for calculating importance score
    
}

transformed parameters {

    vector[N_itemalts] psi[N_items]; // positions of alternatives on each issue
    real mu[N_respondents,N_rounds,2]; // utility of A, utility of B
    real delta[N_respondents,N_rounds]; // utility difference

    for (j in 1:N_items) psi[j] = cumulative_sum(append_row(0.0, psi_std[j])); 

    for (i in 1:N_respondents){
        for (k in 1:N_rounds){
            mu[i,k,1] = -1.0*(
                        fabs(psi[itemID[i,1,1,k],candidatepositionID[i,1,1,k]] - psi[itemID[i,1,1,k],respondentpositionID[i,1,1,k]]) + 
                        fabs(psi[itemID[i,1,2,k],candidatepositionID[i,1,2,k]] - psi[itemID[i,1,2,k],respondentpositionID[i,1,2,k]]) + 
                        fabs(psi[itemID[i,1,3,k],candidatepositionID[i,1,3,k]] - psi[itemID[i,1,3,k],respondentpositionID[i,1,3,k]])
                        );
            mu[i,k,2] = -1.0*(
                        fabs(psi[itemID[i,2,1,k],candidatepositionID[i,2,1,k]] - psi[itemID[i,2,1,k],respondentpositionID[i,2,1,k]]) + 
                        fabs(psi[itemID[i,2,2,k],candidatepositionID[i,2,2,k]] - psi[itemID[i,2,2,k],respondentpositionID[i,2,2,k]]) + 
                        fabs(psi[itemID[i,2,3,k],candidatepositionID[i,2,3,k]] - psi[itemID[i,2,3,k],respondentpositionID[i,2,3,k]])
                        );
            delta[i,k] = mu[i,k,2] - mu[i,k,1];
        }
    }

}

model {

    // quasi-likelihood for conjoint responses
    for (i in 1:N_respondents){
        for (k in 1:N_rounds){
            target += bernoulli_logit_lpmf(Y[i,k] | delta[i,k] - gamma[D[i,k]]);
        }
    }

    // quasi-likelihood for ordinal responses for computing population proportions
    for (i in 1:N_respondents){
        for (k in 1:N_items){
            if (ordinal_responses[i,k] != 0) target += categorical_lpmf(ordinal_responses[i,k]|pi[k]);
        }
    }


}

generated quantities {

    real<lower=0> chi[N_items];

    // Calculate importance score
    for (j in 1:N_items){
        chi[j] = 0;
        for (k1 in 1:4){
            for (k2 in 1:4) chi[j] = chi[j] + pi[j,k1]*pi[j,k2]*fabs(psi[j,k1]-psi[j,k2]);
        }
    }

}
'


rstan_options(auto_write = TRUE)
options(mc.cores = 2)
Sys.setenv(OPENBLAS_NUM_THREADS = 1)
    
posterior <- stan(
    model_code = conjoint_code, 
    data = conjoint_scaling_data,
    seed = 1221,
    pars=c(
        "psi","gamma","pi","chi"
    ), 
    iter = 2500,
    warmup = 500, 
    chains = 2,
    refresh = 1,
    cores = 2,
    control = list(max_treedepth = 15, adapt_delta = 0.99)
)  
    
save(posterior,conjoint_scaling_data,file="Posterior_fullsample.Rdata")



